Load libraries

# Load libraries (install first if needed)
library(tidyverse); theme_set(theme_light(base_size = 12))
#> ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
#> ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
#> ✓ tibble  3.1.5     ✓ dplyr   1.0.7
#> ✓ tidyr   1.1.4     ✓ stringr 1.4.0
#> ✓ readr   2.1.1     ✓ forcats 0.5.1
#> ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
#> x dplyr::filter() masks stats::filter()
#> x dplyr::lag()    masks stats::lag()
library(tidylog)
#> 
#> Attaching package: 'tidylog'
#> The following objects are masked from 'package:dplyr':
#> 
#>     add_count, add_tally, anti_join, count, distinct, distinct_all,
#>     distinct_at, distinct_if, filter, filter_all, filter_at, filter_if,
#>     full_join, group_by, group_by_all, group_by_at, group_by_if,
#>     inner_join, left_join, mutate, mutate_all, mutate_at, mutate_if,
#>     relocate, rename, rename_all, rename_at, rename_if, rename_with,
#>     right_join, sample_frac, sample_n, select, select_all, select_at,
#>     select_if, semi_join, slice, slice_head, slice_max, slice_min,
#>     slice_sample, slice_tail, summarise, summarise_all, summarise_at,
#>     summarise_if, summarize, summarize_all, summarize_at, summarize_if,
#>     tally, top_frac, top_n, transmute, transmute_all, transmute_at,
#>     transmute_if, ungroup
#> The following objects are masked from 'package:tidyr':
#> 
#>     drop_na, fill, gather, pivot_longer, pivot_wider, replace_na,
#>     spread, uncount
#> The following object is masked from 'package:stats':
#> 
#>     filter
library(rnaturalearth)
library(rnaturalearthdata)
library(ggsidekick) # devtools::install_github("seananderson/ggsidekick")
library(RColorBrewer)
library(forcats)
library(viridis)
#> Loading required package: viridisLite
#> 
#> Attaching package: 'viridis'
#> The following object is masked from 'package:viridisLite':
#> 
#>     viridis.map
library(rnaturalearth)
library(rnaturalearthdata)
library(broom)
library(rgdal)
#> Loading required package: sp
#> rgdal: version: 1.5-18, (SVN revision 1082)
#> Geospatial Data Abstraction Library extensions to R successfully loaded
#> Loaded GDAL runtime: GDAL 3.1.4, released 2020/10/20
#> Path to GDAL shared files: /Users/maxlindmark/Library/R/4.0/library/rgdal/gdal
#> GDAL binary built with GEOS: TRUE 
#> Loaded PROJ runtime: Rel. 6.3.1, February 10th, 2020, [PJ_VERSION: 631]
#> Path to PROJ shared files: /Users/maxlindmark/Library/R/4.0/library/rgdal/proj
#> Linking to sp version:1.4-4
#> To mute warnings of possible GDAL/OSR exportToProj4() degradation,
#> use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.
library(ggmap)
#> Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
#> Please cite ggmap if you use it! See citation("ggmap") for details.
library(sf)
#> Linking to GEOS 3.8.1, GDAL 3.1.4, PROJ 6.3.1
library(png)
library(patchwork)
library(ggridges)

Read data

d <- readr::read_csv("https://raw.githubusercontent.com/maxlindmark/perch_growth_gradient/master/data/for_analysis/dat.csv") %>% dplyr::select(-...1)
#> New names:
#> * `` -> ...1
#> Rows: 362198 Columns: 12
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr (5): age_ring, area, gear, ID, sex
#> dbl (7): ...1, length_mm, age_bc, age_catch, catch_year, cohort, final_length
#> 
#> â„č Use `spec()` to retrieve the full column specification for this data.
#> â„č Specify the column types or set `show_col_types = FALSE` to quiet this message.

d
glimpse(d)
#> Rows: 362,198
#> Columns: 11
#> $ length_mm    <dbl> 62, 115, 153, 168, 184, 51, 110, 143, 155, 163, 71, 112, 

#> $ age_bc       <dbl> 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 1, 2, 3, 4, 5, 

#> $ age_catch    <dbl> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 5, 5, 5, 5, 5, 

#> $ age_ring     <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y

#> $ area         <chr> "BT", "BT", "BT", "BT", "BT", "BT", "BT", "BT", "BT", "BT

#> $ catch_year   <dbl> 1977, 1977, 1977, 1977, 1977, 1977, 1977, 1977, 1977, 197

#> $ cohort       <dbl> 1972, 1972, 1972, 1972, 1972, 1972, 1972, 1972, 1972, 197

#> $ final_length <dbl> 184, 184, 184, 184, 184, 163, 163, 163, 163, 163, 178, 17

#> $ gear         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N

#> $ ID           <chr> "1977_1_BT", "1977_1_BT", "1977_1_BT", "1977_1_BT", "1977

#> $ sex          <chr> "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "0", "0


# Use only length-at-age by filtering on age_ring
d <- d %>% filter(age_ring == "Y")
#> filter: removed 28,413 rows (8%), 333,785 rows remaining

Plot data

All data

ggplot(d, aes(age_bc, length_mm, color = area)) +
  geom_jitter(height = 0, alpha = 0.2)


ggplot(d, aes(age_bc, length_mm, color = area)) +
  geom_jitter(height = 0, alpha = 0.2) + 
  guides(color = "none") + 
  facet_wrap(~ area)

Sample locations & length of time series

# Map plot
theme_set(theme_light(base_size = 12))

# Read UTM function
LongLatToUTM <- function(x, y, zone){
  xy <- data.frame(ID = 1:length(x), X = x, Y = y)
  coordinates(xy) <- c("X", "Y")
  proj4string(xy) <- CRS("+proj=longlat +datum=WGS84")  ## for example
  res <- spTransform(xy, CRS(paste("+proj=utm +zone=",zone," ellps=WGS84",sep='')))
  return(as.data.frame(res))
}

# Specify ranges for big map
ymin = 52; ymax = 67; xmin = 11; xmax = 25

map_data <- rnaturalearth::ne_countries(
  scale = "medium",
  returnclass = "sf", continent = "europe")

# Crop the polygon for plotting and efficiency:
# st_bbox(map_data) # find the rough coordinates
swe_coast <- suppressWarnings(suppressMessages(
  st_crop(map_data,
          c(xmin = xmin, ymin = ymin, xmax = xmax, ymax = ymax))))

# Transform our map into UTM 33 coordinates, which is the equal-area projection we fit in:
utm_zone33 <- 32633
swe_coast_proj <- sf::st_transform(swe_coast, crs = utm_zone33)

# Add point to areas
sort(unique(d$area))
#>  [1] "BS"    "BT"    "FB"    "FM"    "HO"    "JM"    "MU"    "RA"    "SI_EK"
#> [10] "SI_HA" "TH"    "VN"

df <- data.frame(Area = c("Brunskar (BS)", "Biotest (BT)", "Finbo (FB)", "Forsmark (FM)",
                          "Holmon (HO)", "Kvadofjarden (JM)", "Musko (MU)", "Ranea (RA)",
                          "Simpevarp 1 (SI_EK", "Simpevarp 2 (SI_HA", "Torhamn (TH)", "Vino (VN)"),
                 lon = c(21.5, 18.1, 19.5, 18, 20.9, 16.8, 18.1, 22.3, 16.6, 16.7, 15.9, 16.9),
                 lat = c(60, 60.4, 60.3, 60.5, 63.7, 58, 59, 65.9, 57.3, 57.4, 56.1, 57.5))

# Add UTM coords
utm_coords <- LongLatToUTM(df$lon, df$lat, zone = 33)
#> Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj =
#> prefer_proj): Discarded datum Unknown based on WGS84 ellipsoid in CRS definition
df$X <- utm_coords$X
df$Y <- utm_coords$Y

# Crop the plot
xmin2 <- 330000; xmax2 <- 959000; xrange <- xmax - xmin
ymin2 <- 6000000; ymax2 <- 7300000; yrange <- ymax - ymin

ggplot(swe_coast_proj) +
  geom_sf() +
  coord_sf(xlim = c(xmin2, xmax2), ylim = c(ymin2, ymax2)) +
  geom_point(data = df, aes(x = X, y = Y, color = Area), size = 3) +
  labs(x = "Longitude", y = "Latitude") +
  scale_color_brewer(palette = "Paired") +
  NULL

Sample sizes

# Average sample size by ID
d %>% 
  group_by(cohort, area, ID) %>% 
  summarise(n = n()) %>% 
  ungroup() %>% 
  ggplot(., aes(x = n, y = area, n, fill = area)) +
  geom_density_ridges(stat = "binline", bins = 20, scale = 1, draw_baseline = FALSE, alpha = 0.8) +
  scale_fill_brewer(palette = "Paired") +
  guides(fill = "none") +
  NULL
#> group_by: 3 grouping variables (cohort, area, ID)
#> summarise: now 91,595 rows and 4 columns, 2 group variables remaining (cohort, area)
#> ungroup: no grouping variables


# Sample size by gear (some overlappning gears with different names)
d %>% 
  group_by(gear, area) %>% 
  summarise(n = n()) %>% 
  ggplot(., aes(factor(gear), n, fill = area)) +
  geom_bar(stat = "identity") +
  guides(fill = "none") +
  facet_wrap(~area, scales = "free") +
  scale_fill_brewer(palette = "Paired") +
  theme(axis.text.x = element_text(angle = 90)) +
  NULL
#> group_by: 2 grouping variables (gear, area)
#> summarise: now 37 rows and 3 columns, one group variable remaining (gear)


# Plot sample size by area and cohort (all length-at-ages)
d %>% 
  group_by(cohort, area) %>% 
  summarise(n = n()) %>% 
  ggplot(., aes(cohort, n, fill = area)) +
  geom_bar(stat = "identity") + 
  scale_fill_brewer(palette = "Paired") +
  NULL
#> group_by: 2 grouping variables (cohort, area)
#> summarise: now 505 rows and 3 columns, one group variable remaining (cohort)


d %>% 
  group_by(cohort, area) %>% 
  summarise(n = n()) %>% 
  ggplot(., aes(cohort, n, color = area)) +
  geom_line() + 
  scale_color_brewer(palette = "Paired") +
  facet_wrap(~area) +
  guides(color = "none") +
  theme(axis.text.x = element_text(angle = 90)) +
  NULL
#> group_by: 2 grouping variables (cohort, area)
#> summarise: now 505 rows and 3 columns, one group variable remaining (cohort)


# Plot sample size by area and catch_year
d %>% 
  group_by(catch_year, area) %>% 
  summarise(n = n()) %>% 
  ggplot(., aes(catch_year, n, fill = area)) +
  geom_bar(stat = "identity") + 
  scale_fill_brewer(palette = "Paired") +
  NULL
#> group_by: 2 grouping variables (catch_year, area)
#> summarise: now 371 rows and 3 columns, one group variable remaining (catch_year)


d %>% 
  group_by(catch_year, area) %>% 
  summarise(n = n()) %>% 
  ggplot(., aes(catch_year, n, color = area)) +
  geom_line() + 
  scale_color_brewer(palette = "Paired") +
  facet_wrap(~area) +
  guides(color = "none") +
  theme(axis.text.x = element_text(angle = 90)) +
  NULL
#> group_by: 2 grouping variables (catch_year, area)
#> summarise: now 371 rows and 3 columns, one group variable remaining (catch_year)

Time-slopes of length-at-age

# Calculate time-slopes by age and area
time_slopes_by_year_area <- d %>%
  group_by(age_bc, area) %>% # center length at age for comparison across ages
  mutate(length_mm_ct = length_mm / mean(length_mm)) %>% 
  ungroup() %>% 
  mutate(id = paste(age_bc, area, sep = ";")) %>%
  split(.$id) %>%
  purrr::map(~lm(length_mm_ct ~ catch_year, data = .x)) %>%
  purrr::map_df(broom::tidy, .id = 'id') %>%
  filter(term == 'catch_year') %>% 
  separate(id, into = c("age_bc", "area"), sep = ";") %>% 
  mutate(upr = estimate + std.error*2, lwr = estimate - std.error*2) %>% 
  mutate(id = paste(age_bc, area, sep = ";"))
#> group_by: 2 grouping variables (age_bc, area)
#> mutate (grouped): new variable 'length_mm_ct' (double) with 35,825 unique values and 0% NA
#> ungroup: no grouping variables
#> mutate: new variable 'id' (character) with 167 unique values and 0% NA
#> filter: removed 167 rows (50%), 167 rows remaining
#> mutate: new variable 'upr' (double) with 154 unique values and 9% NA
#>         new variable 'lwr' (double) with 154 unique values and 9% NA
#> mutate: new variable 'id' (character) with 167 unique values and 0% NA

time_slopes_by_year_area

# Add sample size so that we can filter on that
sample_size <- d %>% 
  group_by(age_bc, area) %>% 
  summarise(n = n()) %>% 
  ungroup() %>% 
  mutate(id = paste(age_bc, area, sep = ";")) %>% 
  dplyr::select(n, id)
#> group_by: 2 grouping variables (age_bc, area)
#> summarise: now 167 rows and 3 columns, one group variable remaining (age_bc)
#> ungroup: no grouping variables
#> mutate: new variable 'id' (character) with 167 unique values and 0% NA

# Join sample size
time_slopes_by_year_area <- left_join(time_slopes_by_year_area, sample_size)
#> Joining, by = "id"
#> left_join: added one column (n)
#>            > rows only in x     0
#>            > rows only in y  (  0)
#>            > matched rows     167
#>            >                 =====
#>            > rows total       167

# Plot effect sizes
time_slopes_by_year_area %>%
  filter(n > 30) %>% 
  ggplot(., aes(reorder(age_bc, as.numeric(age_bc)), estimate, color = factor(area))) + 
  geom_point(position = position_dodge(width = 0.4)) +
  geom_errorbar(aes(ymin = lwr, ymax = upr), width = 0.2,
                position = position_dodge(width = 0.4)) +
  geom_hline(yintercept = 0, linetype = "dashed") +
    scale_color_brewer(palette = "Paired") + 
  facet_wrap(~factor(area), scales = "free") + 
  labs(x = "Age", y = "slope: size~time") +
  theme(legend.position = "bottom")
#> filter: removed 53 rows (32%), 114 rows remaining

  NULL
#> NULL

time_slopes_by_year_area %>%
  filter(n > 30) %>% 
  ggplot(., aes(reorder(age_bc, as.numeric(age_bc)), estimate, color = factor(area))) + 
  geom_point(position = position_dodge(width = 0.4)) +
  geom_errorbar(aes(ymin = lwr, ymax = upr), width = 0.2,
                position = position_dodge(width = 0.4)) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  scale_color_brewer(palette = "Paired") + 
  labs(x = "Age", y = "slope: size~time") +
  NULL
#> filter: removed 53 rows (32%), 114 rows remaining

  
time_slopes_by_year_area %>%
  filter(n > 30) %>% 
  ggplot(., aes(as.numeric(age_bc), estimate)) + 
  geom_point(position = position_dodge(width = 0.4)) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  stat_smooth(method = "gam", formula = y ~ s(x, k = 3)) +
  labs(x = "Age", y = "slope: size~time")
#> filter: removed 53 rows (32%), 114 rows remaining

  NULL  
#> NULL